Data building script for grouping GD and PD estimates based on
| Var1 | Freq |
|---|---|
| FCA | 0.9290657 |
| LEO | 0.0034602 |
| PAT | 0.0017301 |
| PBE | 0.0034602 |
| PCO | 0.0294118 |
| PTI | 0.0034602 |
| PUN | 0.0294118 |
There are 135 population genetic studies and 314 population density studies in the raw datasets.
| sp | Popden | Popgen |
|---|---|---|
| Acinonyx jubatus | 5 | 16 |
| Caracal aurata | 5 | NA |
| Caracal caracal | NA | 1 |
| Felis bieti | NA | 1 |
| Felis chaus | 1 | 1 |
| Felis lybica | NA | 2 |
| Felis silvestris | 8 | 29 |
| Herpailurus yagouaroundi | NA | 1 |
| Leopardus colocolo | 3 | 12 |
| Leopardus geoffroyi | 4 | 4 |
| Leopardus guigna | NA | 10 |
| Leopardus guttulus | NA | 4 |
| Leopardus jacobita | 2 | 5 |
| Leopardus pardalis | 30 | 7 |
| Leopardus tigrinus | 1 | 1 |
| Leopardus wiedii | 5 | NA |
| Leptailurus serval | 4 | NA |
| Lynx canadensis | 1 | 56 |
| Lynx lynx | 14 | 19 |
| Lynx pardinus | 4 | 3 |
| Lynx rufus | 5 | 60 |
| Neofelis diardi | 11 | NA |
| Neofelis nebulosa | 3 | NA |
| Otocolobus manul | 1 | NA |
| Panthera leo | 35 | 65 |
| Panthera onca | 53 | 43 |
| Panthera pardus | 93 | 24 |
| Panthera tigris | 64 | 31 |
| Panthera uncia | 13 | 17 |
| Pardofelis marmorata | 4 | NA |
| Prionailurus bengalensis | 10 | 1 |
| Prionailurus rubiginosus | 1 | NA |
| Prionailurus viverrinus | 4 | NA |
| Puma concolor | 18 | 108 |
| sp | Popden | Popgen |
|---|---|---|
| Acinonyx jubatus | 3 | 6 |
| Caracal aurata | 1 | NA |
| Caracal caracal | NA | 1 |
| Felis bieti | NA | 1 |
| Felis chaus | 1 | 1 |
| Felis lybica | NA | 1 |
| Felis silvestris | 5 | 9 |
| Herpailurus yagouaroundi | NA | 1 |
| Leopardus colocolo | 2 | 4 |
| Leopardus geoffroyi | 3 | 3 |
| Leopardus guigna | NA | 2 |
| Leopardus guttulus | NA | 1 |
| Leopardus jacobita | 2 | 3 |
| Leopardus pardalis | 8 | 3 |
| Leopardus tigrinus | 1 | 1 |
| Leopardus wiedii | 2 | NA |
| Leptailurus serval | 3 | NA |
| Lynx canadensis | 1 | 2 |
| Lynx lynx | 6 | 14 |
| Lynx pardinus | 2 | 1 |
| Lynx rufus | 2 | 2 |
| Neofelis diardi | 2 | NA |
| Neofelis nebulosa | 3 | NA |
| Otocolobus manul | 1 | NA |
| Panthera leo | 16 | 15 |
| Panthera onca | 12 | 10 |
| Panthera pardus | 21 | 8 |
| Panthera tigris | 11 | 9 |
| Panthera uncia | 7 | 8 |
| Pardofelis marmorata | 4 | NA |
| Prionailurus bengalensis | 5 | 1 |
| Prionailurus rubiginosus | 1 | NA |
| Prionailurus viverrinus | 3 | NA |
| Puma concolor | 6 | 6 |
Example : tigers
j = "Panthera tigris"
## Subset to sp data -----------------------------------------------------------------------
sp_name <- sort(unique(gp_df$sp))
sp_df.i <- droplevels(gp_df[gp_df$sp %in% j,])
all(complete.cases(sp_df.i[,c("long","lat")]))
dups <- duplicated(sp_df.i[,c("long","lat","source")])
if (any(dups)) {
print(paste(length((dups[dups])), "Dups detected"))
sp_df <- sp_df.i[!dups,]
sp_df.ii <- sp_df.i[dups,]
} else sp_df <- sp_df.i
# - initialise column
sp_df$cluster_den <- NA
sp_df$cluster_gen <- NA
sp_df$closest_ID <- NA
sp_df$distance <- NA
sp_df$cluster_center <- NA
# Biogeographical realms from https://www.sciencebase.gov/catalog/item/508fece8e4b0a1b43c29ca22
# biogeo <- readOGR("www_ter_ecos.shp")
points <- sp::SpatialPoints(coords=sp_df[,c("long","lat")], proj4string = sp::CRS("+proj=longlat +datum=WGS84 +no_defs"))
biogeo.point <- raster::extract(x=biogeo[,c("REALM")], y=points)
sp_df <- cbind(sp_df, biogeo.point[,c("REALM")])
nbiogeo <- table(sp_df$REALM)
nbiogeo
kk <- tapply(sp_df$source, sp_df$REALM, n_distinct)
table(sp_df$REALM, sp_df$source)
# Which biogeo has both GD and PD
sp_df <- subset(sp_df, REALM %in% names(which(kk == 2)))
sp_df$REALM <- droplevels(sp_df$REALM)
#subset to biogeo
for (k in names(kk)){
bg_i <- which(sp_df$REALM == k)
# Check number of pop in each dataset
foo <- table(sp_df$source[bg_i])
print(k)
print(foo)
if(length(foo) == 0) next
# using Popden coordinates as centers
cls_df <- sp_df[bg_i,]
den_cls <- kmeans(cls_df[,c("long","lat")], centers=cls_df[which(cls_df$source == "Popden"),c("long","lat")], trace=TRUE)
cls_df[,"cluster_den"] <- den_cls$cluster
cls_df$cluster_den <- paste0(cls_df$cluster_den,"_", cls_df$REALM)
# using Popgen coordinates as centers
gen_cls <- kmeans(cls_df[,c("long","lat")], centers=cls_df[which(cls_df$source == "Popgen"),c("long","lat")], trace=TRUE)
cls_df[,"cluster_gen"] <- gen_cls$cluster
cls_df$cluster_gen <- paste0(cls_df$cluster_gen,"_",cls_df$REALM)
sp_df[bg_i, "cluster_den"] <- cls_df$cluster_den
sp_df[bg_i, "cluster_gen"] <- cls_df$cluster_gen
## based on gen
bg_df <- sp_df[bg_i, ]
# - calculate distances between the nearest points -Popgen <-> Popden, (in meters)
xx <- bg_df[which(bg_df$source == "Popgen"),c("long","lat")]
yy <- bg_df[which(bg_df$source == "Popden"),c("long","lat")]
mat_gen2den <- geosphere::distm(xx,
yy)
# get distance
bg_df[which(bg_df$source == "Popgen"),"distance"] <- apply(mat_gen2den, 1, min)
bg_df[which(bg_df$source == "Popden"),"distance"] <- apply(mat_gen2den, 2, min)
# get id of the closest and store it in bg_df$closest_ID
bg_df$closest_ID[which(bg_df$source == "Popden")] <- bg_df$ID[which(bg_df$source == "Popgen")][apply(mat_gen2den, 2, which.min)]
bg_df$closest_ID[which(bg_df$source == "Popgen")] <- bg_df$ID[which(bg_df$source == "Popden")][apply(mat_gen2den, 1, which.min)]
# - assign to the nearest cluster (not given by kmeans)
# sort df in ascending order of the distance
bg_df <- bg_df[order(bg_df$distance),]
# by row operation
# modify clus_den
for (i in 1:nrow(bg_df)){
x <- bg_df[i, "ID"]
# refresh cluster info
b <- xtabs(~cluster_den+source, data=bg_df)
nclus.d <- which(rowSums(b > 0) == 2)
clos_id <- bg_df[bg_df$ID == x,"closest_ID"]
if(!bg_df$cluster_den[bg_df$ID == clos_id] %in% names(nclus.d)){
bg_df$cluster_den[bg_df$ID == x] <- bg_df[bg_df$ID == clos_id,"cluster_den"]
}
}
# modify clus_gen
for (i in 1:nrow(bg_df)){
x <- bg_df[i, "ID"]
# refresh cluster info
b <- xtabs(~cluster_gen+source, data=bg_df)
nclus.g <- which(rowSums(b > 0) == 2)
clos_id <- bg_df[bg_df$ID == x,"closest_ID"]
if(!bg_df$cluster_gen[bg_df$ID == clos_id] %in% names(nclus.g)){
bg_df$cluster_gen[bg_df$ID == x] <- bg_df[bg_df$ID == clos_id,"cluster_gen"]
}
}
sp_df[bg_i,] <- bg_df
}
if(any(dups)){
poo <- merge(sp_df.ii[,c("long","lat","source")],
sp_df[,c("long","lat","source","cluster_den","cluster_gen","cluster_center", "closest_ID","distance")], all.x=T, all.y=F)
poop <- merge(sp_df.ii, unique(poo))
sp_df <- rbind(sp_df, poop)
}
# Sigma data -----------------------------------------------------------
sigma_df <- read.csv("Data/Sigma_data.csv")
Sigma data details:
sp_df <- merge(sp_df, sigma_df, by="sp", all.x=T)
sp_df$distance.lowerthanDI <- sp_df$distance <= sp_df$DI*1e3
gendata_sp <- subset(gen, sp %in% j)
dendata_sp <- subset(den, sp %in% j)
clus_sp <- subset(sp_df, sp %in% j)
clus_sp <- clus_sp[which(sp_df$distance.lowerthanDI == TRUE),]
# Separate data based on source
clus_sp_gen <- clus_sp[which(clus_sp$source == "Popgen"),c("country","long","lat","cluster_center","distance.lowerthanDI")]
clus_sp_gen <- unique(clus_sp_gen)
clus_sp_den <- clus_sp[which(clus_sp$source == "Popden"),c("ref","country","long","lat","cluster_center","distance.lowerthanDI")]
clus_sp_den <- unique(clus_sp_den)
x <- dplyr::left_join(gendata_sp,clus_sp_gen, by=c("country","long","lat"))
if(any(is.na(x$cluster_center))) x <- x[!is.na(x$cluster_center),]
xl[[j]] <- x
y <- dplyr::left_join(clus_sp_den, dendata_sp)
if(any(is.na(y$cluster_center))) y <- y[!is.na(y$cluster_center),]
y$cluster_center <- droplevels(as.factor(y$cluster_center))
yl[[j]] <- y
# # Calculate the DP average across studies in each cluster_center before merging with the GD
DP_mean <- aggregate(dp ~ cluster_center, data = y, FUN=mean, na.action=na.omit)
DP_max <- aggregate(dp ~ cluster_center, data = y, FUN=max, na.action=na.omit)
DP_sd <- aggregate(dp ~ cluster_center, data = y, FUN=sd, na.action=na.omit)
DP_n <- aggregate(dp ~ cluster_center, data = y, FUN=n_distinct, na.action=na.omit)
DP_median <- aggregate(dp ~ cluster_center, data = y, FUN=median, na.action=na.omit)
DP_mean <- data.frame(cluster_center=DP_max[,1], DP_max=DP_max[,2], DP_mean=DP_mean[,2], DP_sd=DP_sd[,2], DP_n=DP_n[,2], DP_median=DP_median[,2])
DP_ref <- tapply(y$ref, y$cluster_center, unique)
DP_ref <- lapply(DP_ref, paste, collapse=", ")
DP_ref <- do.call(rbind,DP_ref)
DP_ref <- data.frame(ref=DP_ref, cluster_center=rownames(DP_ref))
DP_mean <- merge(DP_mean, DP_ref, all.x=T)
DP_pop <- tapply(y$population, y$cluster_center, unique)
DP_pop <- lapply(DP_pop, paste, collapse=", ")
DP_pop <- do.call(rbind,DP_pop)
DP_pop <- data.frame(population=DP_pop, cluster_center=rownames(DP_pop))
DP_mean <- merge(DP_mean, DP_pop, all.x=T)
gen_dp <- dplyr::left_join(x, DP_mean, by="cluster_center", suffix=c(".gen",".den"))
| Species | Density records | Density studies | Genetic div. records | Genetic div. studies | Number of clusters with density and genetic records |
|---|---|---|---|---|---|
| Acinonyx jubatus | 7 | 3 | 2 | 2 | 2 |
| Felis silvestris | 19 | 9 | 6 | 4 | 6 |
| Leopardus colocolo | 4 | 2 | 2 | 1 | 2 |
| Leopardus jacobita | 4 | 2 | 2 | 1 | 2 |
| Leopardus pardalis | 15 | 3 | 11 | 4 | 3 |
| Lynx canadensis | 1 | 1 | 1 | 1 | 1 |
| Lynx lynx | 43 | 7 | 7 | 2 | 5 |
| Lynx pardinus | 4 | 3 | 4 | 2 | 3 |
| Lynx rufus | 12 | 4 | 5 | 2 | 4 |
| Panthera leo | 106 | 12 | 51 | 11 | 23 |
| Panthera onca | 99 | 25 | 44 | 13 | 23 |
| Panthera pardus | 66 | 26 | 20 | 13 | 18 |
| Panthera tigris | 150 | 31 | 33 | 19 | 19 |
| Panthera uncia | 30 | 13 | 14 | 5 | 8 |
| Puma concolor | 29 | 12 | 17 | 11 | 13 |
| Species | Density records | Density studies | Genetic div. records | Genetic div. studies | Number of countries with density and genetic records |
|---|---|---|---|---|---|
| Acinonyx jubatus | 12 | 4 | 12 | 5 | 2 |
| Felis chaus | 1 | 1 | 1 | 1 | 1 |
| Felis silvestris | 19 | 8 | 13 | 5 | 4 |
| Leopardus colocolo | 5 | 3 | 6 | 1 | 2 |
| Leopardus geoffroyi | 10 | 4 | 4 | 2 | 3 |
| Leopardus jacobita | 4 | 2 | 4 | 1 | 2 |
| Leopardus pardalis | 18 | 8 | 12 | 4 | 3 |
| Leopardus tigrinus | 2 | 1 | 1 | 1 | 1 |
| Lynx canadensis | 1 | 1 | 47 | 6 | 1 |
| Lynx lynx | 30 | 8 | 3 | 1 | 3 |
| Lynx pardinus | 5 | 4 | 4 | 2 | 1 |
| Lynx rufus | 12 | 4 | 61 | 8 | 1 |
| Panthera leo | 61 | 15 | 75 | 14 | 12 |
| Panthera onca | 92 | 35 | 50 | 15 | 9 |
| Panthera pardus | 199 | 43 | 24 | 15 | 7 |
| Panthera tigris | 153 | 61 | 35 | 19 | 9 |
| Panthera uncia | 33 | 12 | 12 | 5 | 6 |
| Puma concolor | 26 | 14 | 120 | 21 | 5 |
There are 683 PD and 484 GD records from a total of 354 publications, across 15 and 18 in cluster and country dataset respectively.
(as in Figure 1)